import pandas as pd
df = {}
from clustergrammer2 import net
import ipywidgets as widgets
import numpy as np
from bqplot import pyplot as plt
import bqplot
from glob import glob
df['cell_type'] = pd.read_excel('../data/big_data/mmc2__codex_cell_type_info.xlsx', index_col=0)
df['cell_type'].shape
ct_dict = {}
cell_types = []
for inst_id in df['cell_type'].index.tolist():
inst_ct = df['cell_type']['Imaging phenotype (cell type)'].loc[inst_id]
cell_types.append(inst_ct)
ct_dict[inst_id] = inst_ct
cell_types = sorted(list(set(cell_types)))
print(len(cell_types))
cell_types
x_dim = 1200
y_dim = 1000
glob('../data/big_data/*.txt')
glob('../data/big_data/*.csv')
See http://welikesharingdata.blob.core.windows.net/forshare/index.html
%%time
df['exp'] = pd.read_csv('../data/big_data/Suppl.Table2.CODEX_paper_MRLdatasetexpression.csv')
new_rows = ['C-' + str(x) for x in df['exp'].index.tolist()]
df['exp'].index = new_rows
print(df['exp'].shape)
df['exp'].head()
df['exp'].columns.tolist()
sample_list = [x.split('_')[0] for x in list(df['exp']['sample_Xtile_Ytile'].get_values())]
ser_sample = pd.Series(sample_list, name='sample_slide', index=df['exp'].index.tolist())
print(len(sorted(list(set(sample_list)))))
list_slides = sorted(list(set(sample_list)))
list_slides
df['exp']['sample_slide'] = ser_sample
ser_sample.value_counts()
cols = df['exp'].columns.tolist()
cols
exp_cols = cols[1:30]
exp_cols
unique_dict = {}
for inst_col in cols:
inst_list_unique = list(df['exp'][inst_col].unique())
unique_dict[inst_col] = inst_list_unique
inst_num_unique = len(inst_list_unique)
print(inst_col, inst_num_unique)
BALBc: normal tissue MRL/lpr: spleen from animals with systemic autoimmune disease
Start with: 'BALBc-1_X01_Y01'
df_list = []
for inst_tile in ['BALBc-1_X01_Y01', 'BALBc-1_X02_Y01', 'BALBc-1_X01_Y02', 'BALBc-1_X02_Y02']:
# for inst_tile in ['BALBc-1_X01_Y01', 'BALBc-1_X02_Y01']:
# for inst_tile in ['BALBc-1_X01_Y02', 'BALBc-1_X02_Y02']:
keep_rows = []
ser_tile = df['exp']['sample_Xtile_Ytile']
ser_found = ser_tile[ser_tile == inst_tile]
keep_rows.extend(ser_found.index.tolist())
inst_df = df['exp'].loc[keep_rows].transpose()
print('inst_df', inst_df.shape)
if 'X02' in inst_tile:
inst_df.loc['X.X'] = inst_df.loc['X.X'] + 1350
if 'Y01' in inst_tile:
inst_df.loc['Y.Y'] = 2000 - inst_df.loc['Y.Y']
if 'Y02' in inst_tile:
inst_df.loc['Y.Y'] = 1000 - inst_df.loc['Y.Y']
df_list.append(inst_df)
# df['tile'] = df['exp'].loc[keep_rows].transpose()
df['tile'] = pd.concat(df_list, axis=1)
print(df['tile'].shape)
cats = df['tile'].loc['Imaging phenotype cluster ID']
cats = [ct_dict[x] for x in cats]
new_cols = []
cols = df['tile'].columns.tolist()
for index in range(len(cols)):
new_col = (cols[index], 'Cell Type: ' + str(cats[index]))
new_cols.append(new_col)
df['tile'].columns = new_cols
df['tile'].shape
df['tile-exp-ini'] = df['tile'].loc[exp_cols]
df['tile-exp-ini'].shape
df['tile-exp'] = df['tile-exp-ini']
df['tile-exp'][df['tile-exp'] < 0] = 0
df['tile-exp'].transpose().describe()
df['tile-exp'][df['tile-exp'] > 5000] = 5000
df['tile-exp'].shape
df['tile'].loc['Z.Z'].head()
df['tile-loc'] = df['tile'].loc[['X.X', 'Y.Y']].transpose()
df['tile-loc'].shape
# df['tile-loc']['Y.Y'] = 1000 - df['tile-loc']['Y.Y']
df['tile-loc'] = df['tile-loc'].astype('int')
def set_expression_opacity(inst_gene):
ser_opacity = df['tile-exp'].loc[inst_gene]
list_opacity = [float(x/ser_opacity.max()) for x in list(ser_opacity.get_values())]
scatter.default_opacities = list_opacity
fig = plt.figure(title='Scatter')
def_tt = bqplot.Tooltip(fields=['name'], formats=[''])
scatter = plt.scatter(df['tile-loc']['X.X'],
df['tile-loc']['Y.Y'],
figsize=(20,10),
ylim=(0,1000),
xlim=(0,1000), stroke='black',
tooltip=def_tt,
names=df['tile-loc'].index.tolist(),
display_names=False)
inst_width = 900
fig.layout.min_height = str(inst_width/1.2) + 'px'
fig.layout.min_width = str(inst_width) + 'px'
set_expression_opacity('NKp46')
scatter.default_size = 100
scatter.colors = ['red']
cat_colors = {}
# A
cat_colors['NK cells'] = '#FB0006'
# B
cat_colors['granulocytes'] = '#FA1400'
# C
cat_colors['CD4(-)CD8(-) cDC'] = '#FC4B08'
# D
cat_colors['B220(+) DN T cells'] = '#FD8007'
# E
cat_colors['plasma cells'] = '#FDBA0A'
# F
cat_colors['F4/80(+) mphs'] = '#FFF80B'
# G
cat_colors['FDCs'] = '#FC9CA0'
# H
cat_colors['CD11c(+) B cells'] = '#99FF06'
# I
cat_colors['capsule'] = '#68FF0A'
# J
cat_colors['marginal zone mphs'] = '#0A4600'
# K
cat_colors['noid'] = '#25FF04'
# L
cat_colors['B cells'] = '#FFFF09'
# M
cat_colors['erythroblasts'] = '#1FFF3C'
# N
cat_colors['CD106(+)CD16/32(+)CD31(+) stroma'] = '#23FF6D'
# O
cat_colors['CD4(-)CD8(+)cDC'] = '#23FFA3'
# P
cat_colors['CD106(-)CD16/32(-)Ly6C(+)CD31(+) stroma'] = '#20FFDD'
# Q
cat_colors['megakaryocytes'] = '#1CE5FF'
# R
cat_colors['CD106(-)CD16/32(+)Ly6C(+)CD31(-)'] = '#15A7FF'
# S
cat_colors['CD4(+) T cells'] = '#0D6FFF'
# T
cat_colors['CD4(+)MHCII(+)'] = '#0137FF'
# Ud
cat_colors['CD31(hi) vascular'] = '#0000FF'
# V
cat_colors['CD3(+) other markers (-)'] = '#0700FF'
# W
cat_colors['CD106(+)CD16/32(+)CD31(-)Ly6C(-) stroma'] = '#2D00FF'
# X
cat_colors['CD8(+) T cells'] = '#5900FF'
# Y
cat_colors['ERTR7(+) stroma'] = '#8C00FE'
# Z
cat_colors['CD106(+)CD16/32(-)Ly6C(+)CD31(+)'] = '#C300FF'
# [
cat_colors['CD4(+)CD8(-)cDC'] = '#FB00FA'
# dirt
cat_colors['dirt'] = '#BCBCBC'
keep_cols = [x for x in df['tile-exp'].columns.tolist() if 'dirt' not in x[1]]
df['tile-exp-clean'] = df['tile-exp'][keep_cols]
df['tile-exp-clean'].shape
net.load_df(df['tile-exp-clean'])
net.set_cat_colors(axis='col', cat_colors=cat_colors, cat_index=1, cat_title='Cell Type')
net.load_df(df['tile-exp-clean'])
net.widget()
from scipy.spatial import Voronoi
vor = Voronoi(df['tile-loc'])
point_list = df['tile-loc'].index.tolist()
point_names = [x[0] for x in point_list]
cat_names = [x[1].split(': ')[1] for x in point_list]
patch_data = {}
patch_data['x'] = []
patch_data['y'] = []
patch_data['colors'] = []
region_labels = []
region_point_dict = {}
for point_index in range(vor.point_region.shape[0]):
region_index = vor.point_region[point_index]
region_point_dict[region_index] = point_index
for region_index in range(len(vor.regions)):
inst_region = vor.regions[region_index]
if -1 not in inst_region and len(inst_region) > 0:
point_index = region_point_dict[region_index]
point_cat = cat_names[point_index]
region_labels.append(point_cat)
# save cat_colors
inst_color = cat_colors[point_cat]
patch_data['colors'].append(inst_color)
x_list = []
y_list = []
for inst_vertex in inst_region:
inst_pos = vor.vertices[inst_vertex]
x_list.append(inst_pos[0])
y_list.append(inst_pos[1])
patch_data['x'].append(x_list)
patch_data['y'].append(y_list)
import bqplot.pyplot as plt
fig = plt.figure(animation_duration=1000)
patch = plt.plot([], [],
fill='inside',
fill_colors=patch_data['colors'],
stroke_width=1,
close_path=True,
labels=region_labels,
tooltip=def_tt,
axes_options={'x': {'visible': False}, 'y': {'visible': False}},
)
scatter = plt.scatter(df['tile-loc']['X.X'],
df['tile-loc']['Y.Y'],
# tooltip=def_tt,
names=point_names,
display_names=False, default_size=2)
inst_width = 950
fig.layout.min_height = str(inst_width/(1.15)) + 'px'
fig.layout.min_width = str(inst_width) + 'px'
patch.x = patch_data['x']
patch.y = patch_data['y']
plt.xlim(0, 2.0*x_dim)
plt.ylim(0, 2.0*y_dim)
fig
# plt.show()
def mouseover_highlight(self, target):
# print('cat name', target['data']['name'])
list_opacities = []
for inst_label in region_labels:
inst_opacity = 0.25
if inst_label == target['data']['name']:
inst_opacity = 1
list_opacities.append(inst_opacity)
self.opacities = list_opacities
def reset_highlight(self, target):
# print('CLICKING')
list_opacities = [1 for x in region_labels]
self.opacities = list_opacities
patch.on_hover(mouseover_highlight)
patch.on_element_click(reset_highlight)